rm(list = ls())

setwd('/PATH TO REPLICATION MATERIALS')

library(data.table)
library(estimatr)
library(texreg)
library(xtable)
library(plyr)
library(dplyr)
library(haven)
library(car)
library(ggplot2)
library(hrbrthemes)

data <- read_sav('./data/UNCA0002_w2_MERGED_OUTPUT.sav')
data <- data.table(data)
names(data) <- tolower(names(data))

#### Clean Data ####

### define treatment variable: Democratic Biden supporters is the treatment group, non-Democratic Trump supporters is the control group
data[((preschoice==1 | preschoice2==1 | presforcechoice==1 | presforcechoice2==1) & pid3==1), biden:=1]
data[((preschoice==2 | preschoice2==2 | presforcechoice==2 | presforcechoice2==2) & pid3>=2 & pid3<=4), biden:=0]


### define control variables
# pid
data[,democrat:=ifelse(pid7<=2,1,0)]
data[pid7==98, democrat:=NA]
data[,strong_democrat:=ifelse(pid7==1,1,0)]
data[pid7==98, strong_democrat:=NA]
data[,republican:=ifelse(pid7>=6 & pid7<=7,1,0)]
data[pid7==98, republican:=NA]
data[,strong_republican:=ifelse(pid7==7,1,0)]
data[pid7==98, strong_republican:=NA]
# immigrant
data[(bornus==3 & (parbirth>=2 & parbirth<=4)),immigrant:=1]
data[(bornus<=2 & parbirth==3 & is.na(immigrant)),immigrant:=2]
data[(bornus<=2 & is.na(immigrant)), immigrant:=3]
data[(bornus==3 & parbirth==1 & is.na(immigrant)), immigrant:=3]
data[bornus==8, immigrant:=NA]
# registered to vote
data[,registered:=ifelse(regis==1,1,0)]
data[registered==0 & regis==8, registered:=NA]
# ethnicity
data[hispanic==2,hispanic:=0]
# race
data[,white:=ifelse(race_client==1,1,0)]
# gender
data[,male:=ifelse(gender_client==1,1,0)]
data[gender_client==8, male:=NA]
# age
data[,age:=2020-birthyr]
# education (no post secondary degree)
data[,high_school:=ifelse(educ<=2,1,0)]
#unemployment
data[employ==3 | employ==4, unemployment:=1]
data[employ<3 | (employ>4 & employ<98), unemployment:=0]
# family income
data[faminc_new==97, faminc_new:=NA]
# coethnic network [0-2]
data[friends==1 & coworker==1,coethnic_network:=2]
data[friends!=1  & coworker!=1,coethnic_network:=0]
data[is.na(coethnic_network), coethnic_network:=1]
# covid index: six variables about covid effects
data[pandemicemploy!=8, pandemicemploy:=ifelse(pandemicemploy==1,1,0)]
data[havecovid!=8, havecovid:=ifelse(havecovid==1,1,0)]
data[covidsick!=8, covidsick:=ifelse(covidsick==1,1,0)]
data[coviddied!=8, coviddied:=ifelse(coviddied==1,1,0)]
data[covidtest!=8, covidtest:=ifelse(covidtest==1,1,0)]
data[covidhospital!=8, covidhospital:=ifelse(covidhospital==1,1,0)]
data[,covid_index:= pandemicemploy+havecovid+covidsick+coviddied+covidtest+covidhospital]
# swing states (Nate Silver's definition: https://en.wikipedia.org/wiki/Swing_state)
data[inputstate %in% c(33,27,26,32,42,55,4,13,37,12,48), swing_state:=1]
data[is.na(swing_state), swing_state:=0]
# political interest
data[newsint<7, interest:=ifelse(newsint<3,1,0)]
# political participation (w2)
data[polactive_1_w2<=2, donate:=ifelse(polactive_1_w2==1,1,0)]
data[polactive_2_w2<=2, volunteer:=ifelse(polactive_2_w2==1,1,0)]
data[polactive_3_w2<=2, protest:=ifelse(polactive_3_w2==1,1,0)]

# id
data[, id:=1:.N]

### define pre and post election to reshape data for DID analysis
data_pre <- data[,.(biden,democrat,republican,immigrant,hispanic,white,male,age,high_school,id,
                    coethnic_network, registered, unemployment, covid_index, 
                    blackid,aaid,latinoid,hispanicid,americanid,democratid,republicanid,independentid,
                    otherid,blackglad,aaglad,latinoglad,hispanicglad,americanglad,democratglad,republicanglad,independentglad,
                    otherglad,behavior,letter,letter_t, caseid, weight, swing_state, interest,
                    strong_democrat, strong_republican, faminc_new, educ)]
data_pre[,post:=0]

data_post <- data[,.(biden,democrat,republican,immigrant,hispanic,white,male,age,high_school,id,
                     coethnic_network, registered, unemployment, covid_index,
                     blackid_w2,aaid_w2,latinoid_w2,hispanicid_w2,americanid_w2,democratid_w2,republicanid_w2,independentid_w2,
                     otherid_w2,blackglad_w2,aaglad_w2,latinoglad_w2,hispanicglad_w2,americanglad_w2,democratglad_w2,republicanglad_w2,independentglad_w2,
                     otherglad_w2,behavior1_a_w2,behavior2_a_w2,behavior1_b_w2,behavior2_b_w2, letter_state_w2, letter_dc_w2,
                     letter_t_state_w2, letter_t_dc_w2,blackrespect_w2,aarespect_w2,latinorespect_w2,hispanicrespect_w2,
                     demrespect_w2,represpect_w2,indrespect_w2,othrespect_w2,blackvoice_w2,aavoice_w2,latinovoice_w2,hispanicvoice_w2,
                     demvoice_w2,repvoice_w2,indvoice_w2,othvoice_w2,
                     blackpersp_w2,aapersp_w2,latinopersp_w2,hispanicpersp_w2,dempersp_w2,reppersp_w2,indpersp_w2,othrpersp_w2,
                     blackbelong_w2,aabelong_w2,latinobelong_w2,hispanicbelong_w2,dembelong_w2,repbelong_w2,indbelong_w2,othrbelong_w2,
                     blackpolicy_w2,aapolicy_w2,latinopolicy_w2,hispanicpolicy_w2,dempolicy_w2,reppolicy_w2,indpolicy_w2,othrpolicy_w2,
                     blackval_w2,aava_w2,latinoval_w2,hispanicval_w2,demval_w2,repval_w2,indval_w2,othrval_w2, caseid_w2, weight_w2,
                     swing_state, interest, strong_democrat, strong_republican, faminc_new, educ_w2)]
data_post[,post:=1]
setnames(data_post, 'educ_w2', 'educ')

## define outcome variables
data_pre[blackid<98,race_id:=blackid]
data_pre[aaid<98 & is.na(race_id),race_id:=aaid]
data_pre[latinoid<98 & is.na(race_id),race_id:=latinoid]
data_pre[hispanicid<98 & is.na(race_id),race_id:=hispanicid]

data_pre[blackglad<98,race_glad:=blackglad]
data_pre[aaglad<98 & is.na(race_glad),race_glad:=aaglad]
data_pre[latinoglad<98 & is.na(race_glad),race_glad:=latinoglad]
data_pre[hispanicglad<98 & is.na(race_glad),race_glad:=hispanicglad]

data_pre[,racial_id:=race_id+race_glad]
data_pre[americanid<98 & americanglad<98,national_id:=americanid+americanglad]

data_pre[democratid<98,party_id:=democratid]
data_pre[republicanid<98 & is.na(party_id),party_id:=republicanid]
data_pre[independentid<98 & is.na(party_id),party_id:=independentid]
data_pre[otherid<98 & is.na(party_id),party_id:=otherid]

data_pre[democratglad<98,party_glad:=democratglad]
data_pre[republicanglad<98 & is.na(party_glad),party_glad:=republicanglad]
data_pre[independentglad<98 & is.na(party_glad),party_glad:=independentglad]
data_pre[otherglad<98 & is.na(party_glad),party_glad:=otherglad]
data_pre[,partisan_id:=party_id+party_glad]

data_pre[,civic_participation:=ifelse(behavior==2 & letter==1 & letter_t!='',1,0)]
data_pre[behavior==8, civic_participation:=NA]

data_post[(blackid_w2<98 & blackid_w2>-1),race_id:=blackid_w2]
data_post[(aaid_w2<98 & aaid_w2>-1) & is.na(race_id),race_id:=aaid_w2]
data_post[(latinoid_w2<98 & latinoid_w2>-1) & is.na(race_id),race_id:=latinoid_w2]
data_post[(hispanicid_w2<98 & hispanicid_w2>-1) & is.na(race_id),race_id:=hispanicid_w2]

data_post[(blackglad_w2<98 & blackglad_w2>-1),race_glad:=blackglad_w2]
data_post[(aaglad_w2<98 & aaglad_w2>-1) & is.na(race_glad),race_glad:=aaglad_w2]
data_post[(latinoglad_w2<98 & latinoglad_w2>-1) & is.na(race_glad),race_glad:=latinoglad_w2]
data_post[(hispanicglad_w2<98 & hispanicglad_w2>-1) & is.na(race_glad),race_glad:=hispanicglad_w2]

data_post[,racial_id:=race_id+race_glad]
data_post[(americanid_w2<98 & americanid_w2>-1) & (americanglad_w2<98 & americanglad_w2>-1),national_id:=americanid_w2+americanglad_w2]

data_post[(democratid_w2<98 & democratid_w2>-1),party_id:=democratid_w2]
data_post[(republicanid_w2<98 & republicanid_w2>-1) & is.na(party_id),party_id:=republicanid_w2]
data_post[(independentid_w2<98 & independentid_w2>-1) & is.na(party_id),party_id:=independentid_w2]
data_post[(otherid_w2<98 & otherid_w2>-1) & is.na(party_id),party_id:=otherid_w2]

data_post[(democratglad_w2<98 & democratglad_w2>-1),party_glad:=democratglad_w2]
data_post[(republicanglad_w2<98 & republicanglad_w2>-1) & is.na(party_glad),party_glad:=republicanglad_w2]
data_post[(independentglad_w2<98 & independentglad_w2>-1) & is.na(party_glad),party_glad:=independentglad_w2]
data_post[(otherglad_w2<98 & otherglad_w2>-1) & is.na(party_glad),party_glad:=otherglad_w2]
data_post[,partisan_id:=party_id+party_glad]

data_post[((behavior1_a_w2==1 | behavior2_a_w2==1 | behavior1_b_w2==1 | behavior2_b_w2==1) &
             (letter_state_w2==1 | letter_dc_w2==1) & (letter_t_state_w2!='' | letter_t_dc_w2!='')),
          civic_participation:=1]
data_post[(is.na(civic_participation) & ((behavior1_a_w2!=-1 | behavior2_a_w2!=-1 | behavior1_b_w2!=-1 | behavior2_b_w2!=-1))),
          civic_participation:=0]
data_post[(civic_participation==0 & (behavior1_a_w2==8 | behavior2_a_w2==8 | behavior1_b_w2==8 | behavior2_b_w2==8)),
          civic_participation:=NA]

data_post[(blackrespect_w2<98 & blackrespect_w2>-1),race_respect:=blackrespect_w2]
data_post[(aarespect_w2<98 & aarespect_w2>-1) & is.na(race_respect),race_respect:=aarespect_w2]
data_post[(latinorespect_w2<98 & latinorespect_w2>-1) & is.na(race_respect),race_respect:=latinorespect_w2]
data_post[(hispanicrespect_w2<98 & hispanicrespect_w2>-1) & is.na(race_respect),race_respect:=hispanicrespect_w2]

data_post[(blackvoice_w2<98 & blackvoice_w2>-1),race_voice:=blackvoice_w2]
data_post[(aavoice_w2<98 & aavoice_w2>-1) & is.na(race_voice),race_voice:=aavoice_w2]
data_post[(latinovoice_w2<98 & latinovoice_w2>-1) & is.na(race_voice),race_voice:=latinovoice_w2]
data_post[(hispanicvoice_w2<98 & hispanicvoice_w2>-1) & is.na(race_voice),race_voice:=hispanicvoice_w2]

data_post[(blackpersp_w2<98 & blackpersp_w2>-1),race_persp:=blackpersp_w2]
data_post[(aapersp_w2<98 & aapersp_w2>-1) & is.na(race_persp),race_persp:=aapersp_w2]
data_post[(latinopersp_w2<98 & latinopersp_w2>-1) & is.na(race_persp),race_persp:=latinopersp_w2]
data_post[(hispanicpersp_w2<98 & hispanicpersp_w2>-1) & is.na(race_persp),race_persp:=hispanicpersp_w2]

data_post[(blackbelong_w2<98 & blackbelong_w2>-1),race_belong:=blackbelong_w2]
data_post[(aabelong_w2<98 & aabelong_w2>-1) & is.na(race_belong),race_belong:=aabelong_w2]
data_post[(latinobelong_w2<98 & latinobelong_w2>-1) & is.na(race_belong),race_belong:=latinobelong_w2]
data_post[(hispanicbelong_w2<98 & hispanicbelong_w2>-1) & is.na(race_belong),race_belong:=hispanicbelong_w2]

data_post[,racial_respect:=race_respect+race_voice]
data_post[,racial_belonging:=race_persp+race_belong]

data_post[(demrespect_w2<98 & demrespect_w2>-1),party_respect:=demrespect_w2]
data_post[(represpect_w2<98 & represpect_w2>-1) & is.na(party_respect),party_respect:=represpect_w2]
data_post[(indrespect_w2<98 & indrespect_w2>-1) & is.na(party_respect),party_respect:=indrespect_w2]
data_post[(othrespect_w2<98 & othrespect_w2>-1) & is.na(party_respect),party_respect:=othrespect_w2]

data_post[(demvoice_w2<98 & demvoice_w2>-1),party_voice:=demvoice_w2]
data_post[(repvoice_w2<98 & repvoice_w2>-1) & is.na(party_voice),party_voice:=repvoice_w2]
data_post[(indvoice_w2<98 & indvoice_w2>-1) & is.na(party_voice),party_voice:=indvoice_w2]
data_post[(othvoice_w2<98 & othvoice_w2>-1) & is.na(party_voice),party_voice:=othvoice_w2]

data_post[(dempersp_w2<98 & dempersp_w2>-1),party_persp:=dempersp_w2]
data_post[(reppersp_w2<98 & reppersp_w2>-1) & is.na(party_persp),party_persp:=reppersp_w2]
data_post[(indpersp_w2<98 & indpersp_w2>-1) & is.na(party_persp),party_persp:=indpersp_w2]
data_post[(othrpersp_w2<98 & othrpersp_w2>-1) & is.na(party_persp),party_persp:=othrpersp_w2]

data_post[(dembelong_w2<98 & dembelong_w2>-1),party_belong:=dembelong_w2]
data_post[(repbelong_w2<98 & repbelong_w2>-1) & is.na(party_belong),party_belong:=repbelong_w2]
data_post[(indbelong_w2<98 & indbelong_w2>-1) & is.na(party_belong),party_belong:=indbelong_w2]
data_post[(othrbelong_w2<98 & othrbelong_w2>-1) & is.na(party_belong),party_belong:=othrbelong_w2]

data_post[,partisan_respect:=party_respect+party_voice]
data_post[,partisan_belonging:=party_persp+party_belong]

data_post[(blackpolicy_w2<98 & blackpolicy_w2>-1),race_policy:=blackpolicy_w2]
data_post[(aapolicy_w2<98 & aapolicy_w2>-1) & is.na(race_policy),race_policy:=aapolicy_w2]
data_post[(latinopolicy_w2<98 & latinopolicy_w2>-1) & is.na(race_policy),race_policy:=latinopolicy_w2]
data_post[(hispanicpolicy_w2<98 & hispanicpolicy_w2>-1) & is.na(race_policy),race_policy:=hispanicpolicy_w2]

data_post[(blackval_w2<98 & blackval_w2>-1),race_value:=blackval_w2]
data_post[(aava_w2<98 & aava_w2>-1) & is.na(race_value),race_value:=aava_w2]
data_post[(latinoval_w2<98 & latinoval_w2>-1) & is.na(race_value),race_value:=latinoval_w2]
data_post[(hispanicval_w2<98 & hispanicval_w2>-1) & is.na(race_value),race_value:=hispanicval_w2]

data_post[(dempolicy_w2<98 & dempolicy_w2>-1),partisan_policy:=dempolicy_w2]
data_post[(reppolicy_w2<98 & reppolicy_w2>-1) & is.na(partisan_policy),partisan_policy:=reppolicy_w2]
data_post[(indpolicy_w2<98 & indpolicy_w2>-1) & is.na(partisan_policy),partisan_policy:=indpolicy_w2]
data_post[(othrpolicy_w2<98 & othrpolicy_w2>-1) & is.na(partisan_policy),partisan_policy:=othrpolicy_w2]

data_post[(demval_w2<98 & demval_w2>-1),partisan_value:=demval_w2]
data_post[(repval_w2<98 & repval_w2>-1) & is.na(partisan_value),partisan_value:=repval_w2]
data_post[(indval_w2<98 & indval_w2>-1) & is.na(partisan_value),partisan_value:=indval_w2]
data_post[(othrval_w2<98 & othrval_w2>-1) & is.na(partisan_value),partisan_value:=othrval_w2]

### Data for DID analysis
data_dd <- rbind(data_pre,data_post,fill=TRUE)
data_dd <- data_dd[!is.na(biden)]



#### Analysis in Manuscript ####

#### Table 1: Effects of a Preferred Candidate Win on PoC Identity Attachments ####

# scale outcomes to [0,1]
data_dd[,racial_id_scale:=(racial_id-min(racial_id,na.rm = TRUE))/(max(racial_id, na.rm = TRUE)-min(racial_id, na.rm = TRUE))]
data_dd[,national_id_scale:=(national_id-min(national_id,na.rm = TRUE))/(max(national_id, na.rm = TRUE)-min(national_id, na.rm = TRUE))]
data_dd[,partisan_id_scale:=(partisan_id-min(partisan_id,na.rm = TRUE))/(max(partisan_id, na.rm = TRUE)-min(partisan_id, na.rm = TRUE))]

# Main table with partisan attachment (& national, racial attachment as placebo outcomes)
partisan <- lm_robust(partisan_id_scale ~ post*biden, data=data_dd,
                      se_type = 'stata',
                      clusters = id)

partisan_fe <- lm_robust(partisan_id_scale ~ post*biden, data=data_dd,
                         se_type = 'stata',
                         clusters = id, fixed_effects= ~ id)

national <- lm_robust(national_id_scale ~ post*biden, data=data_dd,
                      se_type = 'stata',
                      subset = !is.na(partisan_id_scale),
                      clusters = id)

national_fe <- lm_robust(national_id_scale ~ post*biden, data=data_dd,
                         se_type = 'stata',
                         subset = !is.na(partisan_id_scale),
                         clusters = id, fixed_effects= ~ id)

racial <- lm_robust(racial_id_scale ~ post*biden, data=data_dd,
                    se_type = 'stata',
                    subset = !is.na(partisan_id_scale),
                    clusters = id)

racial_fe <- lm_robust(racial_id_scale ~ post*biden, data=data_dd,
                       se_type = 'stata',
                       subset = !is.na(partisan_id_scale),
                       clusters = id, fixed_effects= ~ id)

# pre-election Trump supporters' mean attachment
means <- c(data_dd[biden==0, mean(partisan_id_scale, na.rm = TRUE)], data_dd[biden==0, mean(partisan_id_scale, na.rm = TRUE)],
           data_dd[biden==0 & !is.na(partisan_id_scale), mean(national_id_scale, na.rm = TRUE)], data_dd[biden==0 & !is.na(partisan_id_scale), mean(national_id_scale, na.rm = TRUE)],
           data_dd[biden==0 & !is.na(partisan_id_scale), mean(racial_id_scale, na.rm = TRUE)], data_dd[biden==0 & !is.na(partisan_id_scale), mean(racial_id_scale, na.rm = TRUE)])

table1 <- texreg(list(partisan, partisan_fe, national, national_fe, racial, racial_fe),
                 include.ci = FALSE,
                 include.adjrs = FALSE, include.rmse = FALSE,
                 stars = c(0.001, 0.01, 0.05, 0.1), digits = 3,
                 custom.header = list('Partisan ID' = 1:2, 'National ID' = 3:4, 'Racial ID' = 5:6),
                 custom.model.names = c('(1)', '(2)', '(3)', '(4)', '(5)', '(6)'),
                 custom.coef.map = list('biden' = 'Biden supporter', 'post'= 'Post-election', 
                                        'post:biden' = 'Post election x Biden supporter'),
                 custom.gof.rows = list("Average Trump supporter ID" = means, "Individual FE" = rep(c('N', 'Y'), 3)),
                 symbol = "\\circ",
                 caption = "Effects of a Preferred Candidate Win on PoC Identity Attachments",
                 caption.above = TRUE,
                 threeparttable = TRUE,
                 custom.note = "\\item The dependent variable is a scaled [0,1] index of identity attachment based on items I identify as... and I am glad to be.... Biden supporter indicates preference for Biden over Trump and post-election after election survey. Individual-clustered standard errors are in parentheses. Average Trump supporter ID is Trump supporters' pre-election mean attachment. ***p < 0.001; ∗∗p < 0.01; ∗p < 0.05; ◦p < 0.1"
                 )

write(table1, file.path('./main_result_Biden_Dems_vs_Trump_non_Dems.tex'))



#### Analysis in Appendix ####


#### Appendix D: Table D.1 ####
# load data to identify observations that answered wave 2
data <- read_sav('./data/UNCA0002_w2_MERGED_OUTPUT.sav')
data <- data.table(data)
names(data) <- tolower(names(data))
data[ , response_wave_2:=ifelse(is.na(caseid_w2),0,1)]

# merge with analysis data
data_attrition <- merge(data_dd,
                 data[,.(caseid,response_wave_2)],
                 by='caseid',all.x = TRUE)
data_attrition[faminc_new!=97,income_median:=ifelse(faminc_new>8,1,0)]

# compute table D.1 comparing characteristics of respondents who did/did not answer wave 2
summary_stats <- data_attrition[post==0, .(lapply(.SD, mean, na.rm=TRUE)), by=response_wave_2,
                         .SDcols=c('male' , 'age' ,  'white' , 'hispanic' , 
                                   'high_school' , 'unemployment' , 'income_median',
                                   'covid_index',
                                   'immigrant' ,
                                   'coethnic_network' ,
                                   'democrat' ,
                                   'republican',
                                   'biden',
                                   'registered', 'interest', 'swing_state')]

vars <- c('male' , 'age' ,  'white' , 'hispanic' , 
          'high_school' , 'unemployment' , 'income_median',
          'covid_index',
          'immigrant' ,
          'coethnic_network' ,
          'democrat' ,
          'republican' ,
          'biden',
          'registered', 'interest', 'swing_state')
summary_stats <- cbind(summary_stats, rep(vars,2))
names(summary_stats) <- c('wave', 'mean', 'var')
summary_stats$mean <-  unlist(summary_stats$mean)
summary_stats <- dcast(summary_stats[,.(var, wave, mean)], var~wave, value.var=c('mean'))

summary_stats <- summary_stats[vars, on="var"]

p_value <- c()
difference <- c()
for (i in vars) {
  vartest <- var.test(get(i) ~ response_wave_2, data = data_attrition[post==0])
  if (vartest$p.value > 0.1) {
    ttest <- t.test(get(i) ~ response_wave_2, data = data_attrition[post==0], var.equal = TRUE)
    
  } else {
    ttest <- t.test(get(i) ~ response_wave_2, data = data_attrition[post==0])
  }
  difference <- c(difference, unname(ttest$estimate[2] - ttest$estimate[1]))
  p_value <- c(p_value, ttest$p.value)
}

summary_stats <- cbind(summary_stats, difference, p_value)
summary_stats <- rbind(summary_stats, t(c('N', data_attrition[post==0 & response_wave_2==0,.N], data_attrition[post==0 & response_wave_2==1,.N], '', '')),use.names=FALSE)
summary_stats <- summary_stats[,lapply(.SD, as.numeric),by=var]
colnames(summary_stats) <- c('var', 'did not answer wave 2', 'did answer wave 2', 'difference in means', 'p-value of diff-in-means')

### Print Table D.1
table_d1 <- xtable(summary_stats,
                           caption="Comparison of average individual characteristics across respondents who did and did not answer the post-election survey wave",
                           digits=c(0,0,3,3,3,3), caption.placement='top', align='llcccc')

comment <- list(pos = list(0), command = NULL)
comment$pos[[1]] <- c(nrow(summary_stats))
comment$command <- c(paste("\\hline\n",
                           "{\\footnotesize Notes: The table compares average individual-level characteristics, measured during the pre-election wave, across respondents that did not answer the post-election wave and respondents that did.}\n", sep = ""))

print(table_d1,
      add.to.row = comment,
      include.rownames=FALSE,
      file = './attrition_Biden_Dems_vs_Trump_nonDems.tex')


#### Appendix D: Table D.2 ####
### compute table D.2: checks whether support for Biden explains probability of responding post-election survey
out1 <- lm_robust(response_wave_2 ~ biden,
                  data=data_attrition, subset = post==0)

### print table D.2
table_d2 <- texreg(out1,
                    include.ci = FALSE,
                    include.adjrs = FALSE, include.rmse = FALSE,
                    stars = c(0.001, 0.01, 0.05), digits = 3,
                    custom.header = list('Respondent answered Wave 2' = 1),
                    custom.model.names = c('(1)'),
                    caption = "Probability of responding the post-election survey by support for Biden",
                    caption.above = TRUE,
                    threeparttable = TRUE,
                    custom.note = "\\item Notes: The dependent variable is a dummy that indicates whether the respondent answered the post-election survey. Biden supporter indicates a respondent’s choice for Biden over Trump in the pre-election survey. The estimates are from a linear probability model. ***p < 0.001; ∗∗p < 0.01; ∗p < 0.05"
                   )

write(table_d2, file.path('./attrition2_Biden_Dems_vs_Trump_nonDems.tex'))

### compute F-statistic reported in Appendix D (survey attrition)
out2 <- lm_robust(response_wave_2 ~ male*biden + age*biden +  white*biden + hispanic*biden + 
                    high_school*biden + unemployment*biden + income_median*biden +
                    covid_index*biden +
                    immigrant*biden +
                    coethnic_network*biden +
                    registered*biden + interest*biden + swing_state*biden,
                  data=data_attrition, subset = post==0)

linearHypothesis(out2, c('male:biden=0', 'biden:age=0', 'biden:white=0', 'biden:hispanic=0',
                              'biden:high_school=0', 'biden:unemployment=0', 'biden:income_median=0',
                              'biden:covid_index=0', 'biden:immigrant=0', 'biden:coethnic_network=0',
                              'biden:registered=0', 'biden:interest=0', 'biden:swing_state=0'),
                      test = 'F', white.adjust='hc2')
## F = 1.1861 p-value = 0.2849


#### Appendix E: Figure E.1 ####
## drop respondents that didn't answer the outcome variables in both waves
data_dd[, out_not_miss:=ifelse(!is.na(partisan_id_scale),1,0)]
data_dd[, keep:=sum(out_not_miss), by=id]

figure_e1 <- data_dd[keep==2] %>%
  ggplot(aes(x=partisan_id_scale, fill=as.character(biden))) +
  geom_histogram( color="#e9ecef", position = 'dodge') +
  labs(fill="") +
  theme_ipsum() +
  xlab("Partisan ID") +
  ylab("Count") +
  scale_x_continuous(breaks=seq(0,1,.2)) +
  scale_fill_discrete(labels = c('Trump', 'Biden')) +
  geom_vline(data = ddply(data_dd[keep==2 & biden==1], "post", summarise, avg = mean(partisan_id_scale)), aes(xintercept=avg, colour= "#F8766D", alpha=0.7), show.legend = F) +
  geom_vline(data = ddply(data_dd[keep==2 & biden==0], "post", summarise, avg = mean(partisan_id_scale)), aes(xintercept=avg, colour= "#00BFC4", alpha=0.7), show.legend = F) +
  facet_wrap(~post, labeller = labeller(post = 
                                          c("0" = "Before election",
                                            "1" = "After election")))
# print Figure E.1
ggsave(figure_e1, './partisan_id_distribution.pdf',  device = cairo_pdf, width=7, height=4.5)

#### Appendix E: Table E.1 ####
### Main table excluding observations that didn't register outcomes in both waves
data_dd[, out_not_miss:=ifelse(!is.na(partisan_id_scale),1,0)]
data_dd[, keep_partisan:=sum(out_not_miss), by=id]
data_dd[, out_not_missr:=ifelse(!is.na(racial_id_scale),1,0)]
data_dd[, keep_racial:=sum(out_not_missr), by=id]
data_dd[, out_not_missn:=ifelse(!is.na(national_id_scale),1,0)]
data_dd[, keep_national:=sum(out_not_missn), by=id]

partisan <- lm_robust(partisan_id_scale ~ post*biden, data=data_dd,
                      subset = keep_partisan==2,
                      se_type = 'stata',
                      clusters = id)

partisan_fe <- lm_robust(partisan_id_scale ~ post*biden, data=data_dd,
                         subset = keep_partisan==2,
                         se_type = 'stata',
                         clusters = id, fixed_effects= ~ id)

national <- lm_robust(national_id_scale ~ post*biden, data=data_dd,
                      se_type = 'stata',
                      subset = !is.na(partisan_id_scale) & keep_national==2,
                      clusters = id)

national_fe <- lm_robust(national_id_scale ~ post*biden, data=data_dd,
                         se_type = 'stata',
                         subset = !is.na(partisan_id_scale) & keep_national==2,
                         clusters = id, fixed_effects= ~ id)

racial <- lm_robust(racial_id_scale ~ post*biden, data=data_dd,
                    se_type = 'stata',
                    subset = !is.na(partisan_id_scale) & keep_racial==2,
                    clusters = id)

racial_fe <- lm_robust(racial_id_scale ~ post*biden, data=data_dd,
                       se_type = 'stata',
                       subset = !is.na(partisan_id_scale) & keep_racial==2,
                       clusters = id, fixed_effects= ~ id)

# pre-election Trump supporters' mean attachment
means <- c(data_dd[biden==0 & keep_partisan==2, mean(partisan_id_scale, na.rm = TRUE)], data_dd[biden==0 & keep_partisan==2, mean(partisan_id_scale, na.rm = TRUE)],
           data_dd[biden==0 & !is.na(partisan_id_scale) & keep_national==2, mean(national_id_scale, na.rm = TRUE)], data_dd[biden==0 & !is.na(partisan_id_scale) & keep_national==2, mean(national_id_scale, na.rm = TRUE)],
           data_dd[biden==0 & !is.na(partisan_id_scale) & keep_racial==2, mean(racial_id_scale, na.rm = TRUE)], data_dd[biden==0 & !is.na(partisan_id_scale) & keep_racial==2, mean(racial_id_scale, na.rm = TRUE)])

table_e1 <- texreg(list(partisan, partisan_fe, national, national_fe, racial, racial_fe),
                   include.ci = FALSE,
                   include.adjrs = FALSE, include.rmse = FALSE,
                   stars = c(0.001, 0.01, 0.05, 0.1), digits = 3,
                    custom.header = list('Partisan ID' = 1:2, 'National ID' = 3:4, 'Racial ID' = 5:6),
                    custom.model.names = c('(1)', '(2)', '(3)', '(4)', '(5)', '(6)'),
                    custom.coef.map = list('biden' = 'Biden supporter', 'post'= 'Post-election', 
                                           'post:biden' = 'Post election x Biden supporter'),
                    custom.gof.rows = list("Average Trump supporter ID" = means, "Individual FE" = rep(c('N', 'Y'), 3)),
                    symbol = "\\circ",
                    caption = "Effects of a Preferred Candidate Win on PoC Identity Attachments",
                    caption.above = TRUE,
                    threeparttable = TRUE,
                    custom.note = "\\item The dependent variable is a scaled [0,1] index of identity attachment based on items I identify as... and I am glad to be.... Biden supporter indicates preference for Biden over Trump and post-election after election survey. Individual-clustered standard errors are in parentheses. Average Trump supporter ID is Trump supporters' pre-election mean attachment. ***p < 0.001; ∗∗p < 0.01; ∗p < 0.05; ◦p < 0.1"
                    )

write(table_e1, file.path('./main_result_Biden_Dems_vs_Trump_non_Dems_drop_incomplete_obs.tex'))


#### Appendix E: Table E.2 ####
## Main result on Partisan ID by respondent's race/ethnicity
partisan_hispanic <- lm_robust(partisan_id_scale ~ post*biden, data=data_dd,
                               se_type = 'stata',
                               subset = hispanic==1,
                               clusters = id)
partisan_fe_hispanic <- lm_robust(partisan_id_scale ~ post*biden, data=data_dd,
                                  se_type = 'stata',
                                  subset = hispanic==1,
                                  clusters = id, fixed_effects= ~ id)

partisan_black <- lm_robust(partisan_id_scale ~ post*biden, data=data_dd,
                            se_type = 'stata',
                            subset = hispanic==0,
                            clusters = id)

partisan_fe_black <- lm_robust(partisan_id_scale ~ post*biden, data=data_dd,
                               se_type = 'stata',
                               subset = hispanic==0,
                               clusters = id, fixed_effects= ~ id)

table_e2 <- texreg(list(partisan_hispanic, partisan_fe_hispanic, partisan_black, partisan_fe_black),
                 include.ci = FALSE,
                 include.adjrs = FALSE, include.rmse = FALSE,
                 stars = c(0.001, 0.01, 0.05, 0.1), digits = 3,
                 custom.coef.map = list('biden' = 'Biden supporter', 'post'= 'Post-election', 
                                        'post:biden' = 'Post election x Biden supporter'),
                 custom.header = list('Latino Respondents' = 1:2, 'Black Respondents' = 3:4),
                 custom.model.names = c('(1)', '(2)', '(3)', '(4)'),
                 symbol = "\\circ",
                 caption = "Effects of a Preferred Candidate Win on PoC Identity Attachments by Ethnicity",
                 caption.above = TRUE,
                 custom.gof.rows = list("Individual FE" = rep(c('N', 'Y'), 2)),
                 threeparttable = TRUE,
                 custom.note = "\\item The dependent variable is a simple sum index of attachment to partisan identity based on the items I identify as... and I am glad to be... Biden supporter is an indicator variable for preference for Biden over Trump. post-election is an indicator variable for post-election survey wave. Individual-clustered standard errors are reported in parentheses. ***p < 0.001; ∗∗p < 0.01; ∗p < 0.05; ◦p < 0.")

write(table_e2, file.path('./partisan_result_Biden_Dems_vs_Trump_non_Dems_by_ethnicity.tex'))

#### Appendix E. Table E.3 ####
## NOTE: run file replicate_table_e3.R in ./code ##

#### Appendix F. Table F.1 ####
data_dd_analysis <- data_dd

## assign median values of scale to pre-election outcomes measured only in second wave
data_dd_analysis[post==0, racial_respect:=8]
data_dd_analysis[,racial_respect_scale:=(racial_respect-min(racial_respect,na.rm = TRUE))/(max(racial_respect, na.rm = TRUE)-min(racial_respect, na.rm = TRUE))]

# effect on outcome controlling for mediator
partisan_med_racial_respect <- lm_robust(partisan_id_scale ~ post*biden + racial_respect, data=data_dd_analysis,
                                         se_type = 'stata',
                                         clusters = id, fixed_effects= ~ id)

# effect on mediator
racial_respect <- lm_robust(racial_respect_scale ~ post*biden, data=data_dd_analysis,
                            se_type = 'stata',
                            subset = !is.na(partisan_id_scale), 
                            clusters = id,
                            fixed_effects= ~ id)


# effect on outcome
main_partisan_fe <- lm_robust(partisan_id_scale ~ post*biden, data=data_dd_analysis,
                              se_type = 'stata',
                              subset = !is.na(racial_respect),
                              clusters = id, fixed_effects= ~ id)

table_f1 <- texreg(list(main_partisan_fe, racial_respect, partisan_med_racial_respect),
                 include.ci = FALSE,
                 include.adjrs = FALSE, include.rmse = FALSE,
                 stars = c(0.001, 0.01, 0.05, 0.1), digits = 3,
                 custom.coef.map = list('biden' = 'Biden supporter', 'post'= 'Post-election', 
                                        'post:biden' = 'Post election x Biden supporter',
                                        'racial_respect' = 'Racial respect'),
                 custom.header = list('Partisan ID' = 1, 'Racial Respect' = 2, 'Partisan ID' = 3),
                 custom.model.names = c('(1)', '(2)', '(3)'),
                 symbol = "\\circ",
                 caption = "Observational Mediation Analysis via Racial Respect",
                 caption.above = TRUE,
                 custom.gof.rows = list("Individual FE" = rep('Y', 3)),
                 threeparttable = TRUE,
                 custom.note = "\\item The dependent variables are a scaled [0,1] index of identity attachment (Columns (1), (3)) and of racial respect (Column (2)). Biden supporter indicates preference for Biden over Trump and post-election after election survey. Racial Respect is the raw measure of racial respect (used as control variable in Column (3). Individual-clustered standard errors are in parentheses. The sample includes observations from respondents that registered an answer for the questions on expressive partisanship, and racial respect. ***p < 0.001; ∗∗p < 0.01; ∗p < 0.05; ◦p < 0")

write(table_f1, file.path('./mediation_analysis_racial_respect_Dems_nonDems.tex'))





